Practical Lab 10
Vanilla CNN and Fine-Tune VGG16 - for Dogs and Cats Classification
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.preprocessing.image import load_img
# Load the data and preprocess the data
train_dir = r'C:\Users\Abdul\CSCN8010\data\kaggle_dogs_vs_cats_small\train'
test_dir = r'C:\Users\Abdul\CSCN8010\data\kaggle_dogs_vs_cats_small\test'
# Data preprocessing
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
train_dir,
target_size=(224, 224),
batch_size=32,
class_mode='binary')
test_generator = test_datagen.flow_from_directory(
test_dir,
target_size=(224, 224),
batch_size=32,
class_mode='binary')
Found 2000 images belonging to 2 classes. Found 2000 images belonging to 2 classes.
EDA
# class names
class_names = sorted(os.listdir(train_dir))
# Data Distribution
train_class_counts = {class_name: len(os.listdir(os.path.join(train_dir, class_name))) for class_name in class_names}
test_class_counts = {class_name: len(os.listdir(os.path.join(test_dir, class_name))) for class_name in class_names}
print("Training Data Class Counts:")
print(train_class_counts)
print("\nTesting Data Class Counts:")
print(test_class_counts)
# Data Visualization
def plot_class_distribution(train_counts, test_counts):
labels = list(train_counts.keys())
train_values = list(train_counts.values())
test_values = list(test_counts.values())
x = np.arange(len(labels))
width = 0.35
fig, ax = plt.subplots()
rects1 = ax.bar(x - width/2, train_values, width, label='Train')
rects2 = ax.bar(x + width/2, test_values, width, label='Test')
ax.set_ylabel('Count')
ax.set_title('Class Distribution')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()
plt.show()
plot_class_distribution(train_class_counts, test_class_counts)
# Data Visualization
def plot_sample_images(directory, class_names, num_samples=5):
plt.figure(figsize=(10, 5*num_samples))
for i, class_name in enumerate(class_names):
class_dir = os.path.join(directory, class_name)
sample_images = os.listdir(class_dir)[:num_samples]
for j, image_name in enumerate(sample_images):
image_path = os.path.join(class_dir, image_name)
image = load_img(image_path, target_size=(224, 224))
plt.subplot(num_samples, len(class_names), i * num_samples + j + 1)
plt.imshow(image)
plt.title(class_name)
plt.axis("off")
plt.show()
print("\nSample Training Images:")
plot_sample_images(train_dir, class_names)
print("\nSample Testing Images:")
plot_sample_images(test_dir, class_names)
Training Data Class Counts:
{'cat': 1000, 'dog': 1000}
Testing Data Class Counts:
{'cat': 1000, 'dog': 1000}
Sample Training Images:
Sample Testing Images:
Neural Network
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
def create_custom_model(input_shape):
model = Sequential([
Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
MaxPooling2D((2, 2)),
Conv2D(64, (3, 3), activation='relu'),
MaxPooling2D((2, 2)),
Conv2D(128, (3, 3), activation='relu'),
MaxPooling2D((2, 2)),
Flatten(),
Dense(128, activation='relu'),
Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
return model
custom_model = create_custom_model(input_shape=(224, 224, 3))
Fine-Tune VGG16 (pre-trained on ImageNet)
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.callbacks import ModelCheckpoint
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
for layer in base_model.layers:
layer.trainable = False
x = Flatten()(base_model.output)
x = Dense(128, activation='relu')(x)
predictions = Dense(1, activation='sigmoid')(x)
fine_tuned_model = Model(inputs=base_model.input, outputs=predictions)
fine_tuned_model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy'])
checkpoint = ModelCheckpoint(filepath='best_model_vgg16.h5',
monitor='val_accuracy',
save_best_only=True,
verbose=1)
callbacks = [checkpoint]
history = fine_tuned_model.fit(train_generator,
epochs=10,
validation_data=test_generator,
callbacks=callbacks)
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5 58889256/58889256 [==============================] - 18s 0us/step Epoch 1/10 63/63 [==============================] - ETA: 0s - loss: 0.6750 - accuracy: 0.7685 Epoch 1: val_accuracy improved from -inf to 0.89100, saving model to best_model_vgg16.h5 63/63 [==============================] - 429s 7s/step - loss: 0.6750 - accuracy: 0.7685 - val_loss: 0.2639 - val_accuracy: 0.8910 Epoch 2/10 63/63 [==============================] - ETA: 0s - loss: 0.1912 - accuracy: 0.9335 Epoch 2: val_accuracy improved from 0.89100 to 0.90450, saving model to best_model_vgg16.h5 63/63 [==============================] - 471s 8s/step - loss: 0.1912 - accuracy: 0.9335 - val_loss: 0.2252 - val_accuracy: 0.9045 Epoch 3/10 63/63 [==============================] - ETA: 0s - loss: 0.1271 - accuracy: 0.9615 Epoch 3: val_accuracy improved from 0.90450 to 0.91350, saving model to best_model_vgg16.h5 63/63 [==============================] - 650s 10s/step - loss: 0.1271 - accuracy: 0.9615 - val_loss: 0.2091 - val_accuracy: 0.9135 Epoch 4/10 63/63 [==============================] - ETA: 0s - loss: 0.0747 - accuracy: 0.9845 Epoch 4: val_accuracy did not improve from 0.91350 63/63 [==============================] - 697s 11s/step - loss: 0.0747 - accuracy: 0.9845 - val_loss: 0.2051 - val_accuracy: 0.9110 Epoch 5/10 63/63 [==============================] - ETA: 0s - loss: 0.0629 - accuracy: 0.9835 Epoch 5: val_accuracy did not improve from 0.91350 63/63 [==============================] - 485s 8s/step - loss: 0.0629 - accuracy: 0.9835 - val_loss: 0.2233 - val_accuracy: 0.9050 Epoch 6/10 63/63 [==============================] - ETA: 0s - loss: 0.0366 - accuracy: 0.9960 Epoch 6: val_accuracy did not improve from 0.91350 63/63 [==============================] - 452s 7s/step - loss: 0.0366 - accuracy: 0.9960 - val_loss: 0.2108 - val_accuracy: 0.9120 Epoch 7/10 63/63 [==============================] - ETA: 0s - loss: 0.0212 - accuracy: 0.9985 Epoch 7: val_accuracy did not improve from 0.91350 63/63 [==============================] - 508s 8s/step - loss: 0.0212 - accuracy: 0.9985 - val_loss: 0.2191 - val_accuracy: 0.9125 Epoch 8/10 63/63 [==============================] - ETA: 0s - loss: 0.0142 - accuracy: 1.0000 Epoch 8: val_accuracy did not improve from 0.91350 63/63 [==============================] - 515s 8s/step - loss: 0.0142 - accuracy: 1.0000 - val_loss: 0.2240 - val_accuracy: 0.9095 Epoch 9/10 63/63 [==============================] - ETA: 0s - loss: 0.0106 - accuracy: 1.0000 Epoch 9: val_accuracy did not improve from 0.91350 63/63 [==============================] - 521s 8s/step - loss: 0.0106 - accuracy: 1.0000 - val_loss: 0.2241 - val_accuracy: 0.9110 Epoch 10/10 63/63 [==============================] - ETA: 0s - loss: 0.0087 - accuracy: 1.0000 Epoch 10: val_accuracy did not improve from 0.91350 63/63 [==============================] - 523s 8s/step - loss: 0.0087 - accuracy: 1.0000 - val_loss: 0.2293 - val_accuracy: 0.9115
# callback to save the best model
checkpoint_custom = ModelCheckpoint(filepath='best_model_custom.h5',
monitor='val_accuracy',
save_best_only=True,
verbose=1)
# Train the model with the callback
history_custom = custom_model.fit(train_generator,
epochs=10,
validation_data=test_generator,
callbacks=[checkpoint_custom])
Epoch 1/10 63/63 [==============================] - ETA: 0s - loss: 0.7665 - accuracy: 0.5095 Epoch 1: val_accuracy improved from -inf to 0.51150, saving model to best_model_custom.h5 63/63 [==============================] - 68s 1s/step - loss: 0.7665 - accuracy: 0.5095 - val_loss: 0.6896 - val_accuracy: 0.5115 Epoch 2/10 63/63 [==============================] - ETA: 0s - loss: 0.6620 - accuracy: 0.5950 Epoch 2: val_accuracy improved from 0.51150 to 0.64600, saving model to best_model_custom.h5 63/63 [==============================] - 69s 1s/step - loss: 0.6620 - accuracy: 0.5950 - val_loss: 0.6264 - val_accuracy: 0.6460 Epoch 3/10 63/63 [==============================] - ETA: 0s - loss: 0.6041 - accuracy: 0.6710 Epoch 3: val_accuracy improved from 0.64600 to 0.69650, saving model to best_model_custom.h5 63/63 [==============================] - 76s 1s/step - loss: 0.6041 - accuracy: 0.6710 - val_loss: 0.5996 - val_accuracy: 0.6965 Epoch 4/10 63/63 [==============================] - ETA: 0s - loss: 0.4857 - accuracy: 0.7655 Epoch 4: val_accuracy improved from 0.69650 to 0.72150, saving model to best_model_custom.h5 63/63 [==============================] - 72s 1s/step - loss: 0.4857 - accuracy: 0.7655 - val_loss: 0.5735 - val_accuracy: 0.7215 Epoch 5/10 63/63 [==============================] - ETA: 0s - loss: 0.3982 - accuracy: 0.8195 Epoch 5: val_accuracy did not improve from 0.72150 63/63 [==============================] - 69s 1s/step - loss: 0.3982 - accuracy: 0.8195 - val_loss: 0.5616 - val_accuracy: 0.7135 Epoch 6/10 63/63 [==============================] - ETA: 0s - loss: 0.2935 - accuracy: 0.8740 Epoch 6: val_accuracy did not improve from 0.72150 63/63 [==============================] - 71s 1s/step - loss: 0.2935 - accuracy: 0.8740 - val_loss: 0.6717 - val_accuracy: 0.7150 Epoch 7/10 63/63 [==============================] - ETA: 0s - loss: 0.1736 - accuracy: 0.9265 Epoch 7: val_accuracy did not improve from 0.72150 63/63 [==============================] - 76s 1s/step - loss: 0.1736 - accuracy: 0.9265 - val_loss: 0.7933 - val_accuracy: 0.6990 Epoch 8/10 63/63 [==============================] - ETA: 0s - loss: 0.1063 - accuracy: 0.9645 Epoch 8: val_accuracy did not improve from 0.72150 63/63 [==============================] - 75s 1s/step - loss: 0.1063 - accuracy: 0.9645 - val_loss: 0.9146 - val_accuracy: 0.6960 Epoch 9/10 63/63 [==============================] - ETA: 0s - loss: 0.0452 - accuracy: 0.9880 Epoch 9: val_accuracy did not improve from 0.72150 63/63 [==============================] - 74s 1s/step - loss: 0.0452 - accuracy: 0.9880 - val_loss: 1.3874 - val_accuracy: 0.7005 Epoch 10/10 63/63 [==============================] - ETA: 0s - loss: 0.0222 - accuracy: 0.9915 Epoch 10: val_accuracy did not improve from 0.72150 63/63 [==============================] - 74s 1s/step - loss: 0.0222 - accuracy: 0.9915 - val_loss: 1.3555 - val_accuracy: 0.6995
Relative performance of the models
from sklearn.metrics import precision_recall_curve
custom_model_best = load_model('best_model_custom.h5')
fine_tuned_model_best = load_model('best_model_vgg16.h5')
custom_model_predictions = custom_model_best.predict(test_generator).flatten()
custom_model_accuracy = accuracy_score(test_generator.classes, custom_model_predictions.round())
custom_model_confusion_matrix = confusion_matrix(test_generator.classes, custom_model_predictions.round())
custom_model_classification_report = classification_report(test_generator.classes, custom_model_predictions.round())
fine_tuned_model_predictions = fine_tuned_model_best.predict(test_generator).flatten()
fine_tuned_model_accuracy = accuracy_score(test_generator.classes, fine_tuned_model_predictions.round())
fine_tuned_model_confusion_matrix = confusion_matrix(test_generator.classes, fine_tuned_model_predictions.round())
fine_tuned_model_classification_report = classification_report(test_generator.classes, fine_tuned_model_predictions.round())
precision_custom, recall_custom, _ = precision_recall_curve(test_generator.classes, custom_model_predictions)
precision_vgg16, recall_vgg16, _ = precision_recall_curve(test_generator.classes, fine_tuned_model_predictions)
plt.plot(recall_custom, precision_custom, label='Custom Model')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve (Custom Model)')
plt.legend()
plt.show()
print("Custom Model:")
print("Accuracy:", custom_model_accuracy)
print("Confusion Matrix:\n", custom_model_confusion_matrix)
print("Classification Report:\n", custom_model_classification_report)
print("\nFine-Tuned VGG16 Model:")
print("Accuracy:", fine_tuned_model_accuracy)
print("Confusion Matrix:\n", fine_tuned_model_confusion_matrix)
print("Classification Report:\n", fine_tuned_model_classification_report)
63/63 [==============================] - 12s 183ms/step 63/63 [==============================] - 252s 4s/step
Custom Model:
Accuracy: 0.5125
Confusion Matrix:
[[493 507]
[468 532]]
Classification Report:
precision recall f1-score support
0 0.51 0.49 0.50 1000
1 0.51 0.53 0.52 1000
accuracy 0.51 2000
macro avg 0.51 0.51 0.51 2000
weighted avg 0.51 0.51 0.51 2000
Fine-Tuned VGG16 Model:
Accuracy: 0.5045
Confusion Matrix:
[[491 509]
[482 518]]
Classification Report:
precision recall f1-score support
0 0.50 0.49 0.50 1000
1 0.50 0.52 0.51 1000
accuracy 0.50 2000
macro avg 0.50 0.50 0.50 2000
weighted avg 0.50 0.50 0.50 2000
Conclusions: